import plotly
plotly.offline.init_notebook_mode()
import requests as r
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
import warnings
warnings.simplefilter('ignore') #ignore the warnings, not the errors
df1=pd.read_csv('pap350.csv')
df2=pd.read_csv('seloger_complet.csv')
# adding source information and making it uniform
df1['Source'] = 'particulier'
df1.drop('District', axis=1, inplace=True)
df2['Source'] = 'agence'
# dropping csv created columns
df1.drop('Unnamed: 0', axis=1, inplace=True)
df1.drop('index', axis=1, inplace=True)
df2.drop('Unnamed: 0', axis=1, inplace=True)
df2.drop('Unnamed: 0.1', axis=1, inplace=True)
df=pd.DataFrame()
df=df.append(df1, ignore_index = True)
df=df.append(df2, ignore_index = True)
df
import plotly.express as px
fig = px.pie(df, values='Price_per_m2', names='Source', title='Lovely pie chart',height=400, hole=.3)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
# dashboard DF1 PAP
_, axes=plt.subplots(2,2,figsize=(18,10))
axes[0, 0].set_title('Histogram', size=15)
axes[0, 1].set_title('Boxplot', size=15)
axes[1, 0].set_title('Price to m2 / Surface', size=15)
axes[1, 1].set_title('Price / Surface', size=15)
axes[0, 0].set(xlabel='Surface in m2', ylabel='Quantity')
axes[0, 1].set(ylabel='Surface in m2')
axes[1, 0].set(xlabel='Square meter price', ylabel='Surface in m2')
axes[1, 1].set(xlabel='Price in €', ylabel='Surface in m2')
axes[0,0].hist(df1['Price_per_m2'], bins=20)
axes[0,1].boxplot(df1['Surface_m2'])
sns.scatterplot(df1.Price_per_m2, df1.Surface_m2,ax=axes[1,0])
sns.scatterplot(df1.Price, df1.Surface_m2,ax=axes[1,1]);
# dashboard DF2 SELOGER
_, axes=plt.subplots(2,2,figsize=(18,10))
axes[0, 0].set_title('Histogram', size=15)
axes[0, 1].set_title('Boxplot', size=15)
axes[1, 0].set_title('Price to m2 / Surface', size=15)
axes[1, 1].set_title('Price / Surface', size=15)
axes[0, 0].set(xlabel='Surface in m2', ylabel='Quantity')
axes[0, 1].set(ylabel='Surface in m2')
axes[1, 0].set(xlabel='Square meter price', ylabel='Surface in m2')
axes[1, 1].set(xlabel='Price in €', ylabel='Surface in m2')
axes[0,0].hist(df2['Price_per_m2'], bins=20)
axes[0,1].boxplot(df2['Surface_m2'])
sns.scatterplot(df2.Price_per_m2, df2.Surface_m2,ax=axes[1,0])
sns.scatterplot(df2.Price, df2.Surface_m2,ax=axes[1,1]);
# MAKE MARKERS SMALLER ? or set axis limits on graphs 2,3,4
df_m2_lessthan_70=df.query('Surface_m2 < 70')
# dashboard
_, axes=plt.subplots(2,2,figsize=(18,10))
hspace = 1
wspace = 1
axes[0, 1].set_title('Histogram', size=15)
axes[0,0].set_title('Boxplot', size=15)
axes[1,1].set_title('Price of m2 / Surface', size=15)
axes[1, 0].set_title('Price / Surface', size=15)
axes[0, 1].set(xlabel='Price per m2', ylabel='Quantity')
axes[0,0].set(ylabel='Surface in m2')
axes[1,1].set(xlabel='Square meter price', ylabel='Surface in m2')
axes[1, 0].set(xlabel='Price in €', ylabel='Surface in m2')
axes[0,1].hist(df_m2_lessthan_70['Price_per_m2'], bins=20)
axes[0,0].boxplot(df_m2_lessthan_70['Surface_m2'])
sns.scatterplot(df.Price_per_m2, df_m2_lessthan_70.Surface_m2,ax=axes[1,1])
sns.scatterplot(df.Price, df_m2_lessthan_70.Surface_m2,ax=axes[1,0]);
import plotly.express as px
fig = px.scatter(x=df.Price, y=df.Surface_m2, color=df.Source, labels={"x": "Price in €", "y": "Surface in m2"})
fig.show()
#interesting lines
import plotly.express as px
fig = px.scatter(x=df.Price_per_m2, y=df.Surface_m2, color=df.Source, labels={"x": "Price of m2 in €", "y": "Surface in m2"})
fig.update_traces(marker=dict(size=6,
opacity=0.4,
line=dict(width=0.5,
color='DarkSlateGrey')),
selector=dict(mode='markers'))
fig.show()
#boxplot answer
import plotly.express as px
fig = px.scatter(x=df.Price_per_m2, y=df.Surface_m2, color=df.Source, labels={"x": "Price of m2 in €", "y": "Surface in m2"})
fig.show()
median_agency=df.query('Source=="agence"').Price_per_m2.median()
median_pap=df.query('Source=="particulier"').Price_per_m2.median()
overpay=round(((median_agency-median_pap)/median_agency),2)
overpay_percent=int(overpay*100)*(-1)
median_m2_agency=df.query('Source=="agence"').Surface_m2.median()
median_m2_pap=df.query('Source=="particulier"').Surface_m2.median()
less_m2=int(median_m2_agency-median_m2_pap)
print("Median price per m2 of agency in Paris is: "+str(median_agency))
print("Median price per m2 of individual in Paris is: "+str(median_pap))
print(" ")
print("By renting from a individual, you will overpay on average "+str(overpay_percent)+"% of the price")
print("And for "+ str(less_m2)+" less m2!")
import plotly.express as px
fig = px.scatter(x=df1.Price_per_m2, y=df1.Surface_m2, opacity=0.9, labels={"x": "Price of m2 in €", "y": "Surface in m2"})
fig.update_traces(marker_color='rgba(255,0,255)')
fig2 = px.scatter(x=df2.Price_per_m2, y=df2.Surface_m2, opacity=0.1)
fig2.update_traces(marker_color='rgba(255,100,100)')
x = fig2.data[0] # second trace, first one is scatter
fig.add_trace(x)
fig.show()
import plotly.express as px
fig = px.scatter(x=df1.Price, y=df1.Surface_m2, opacity=0.9, labels={"x": "Price of m2 in €", "y": "Surface in m2"})
fig.update_traces(marker_color='rgba(255,0,255)')
fig2 = px.scatter(x=df2.Price, y=df2.Surface_m2, opacity=0.1)
fig2.update_traces(marker_color='rgba(255,100,100)')
x = fig2.data[0] # second trace, first one is scatter
fig.add_trace(x)
fig.show()
import plotly.express as px
fig = px.scatter(x=df.Price_per_m2, y=df.Surface_m2, color=df.Zone, marginal_x="box", marginal_y="box", labels={"x": "Price for m2 in €", "y": "Surface in m2"})
fig.show()
import webbrowser
import plotly.graph_objs as go
import plotly.express as px
fig = go.FigureWidget(layout={'hovermode': 'closest'})
scatter = fig.add_scatter(x=df.Price, y=df.Surface_m2, mode='markers',fillcolor='azure')
data = fig.data[0]
fig2 = px.scatter(x=df.Price, y=df.Surface_m2, trendline="ols")
trendline = fig2.data[1] # second trace, first one is scatter
fig.add_trace(trendline)
fig.update_xaxes(title_text="Price in €")
fig.update_yaxes(title_text="Surface in m2")
def do_click(trace, points, state):
if points.point_inds:
ind = points.point_inds[0]
link = df.Link.iloc[ind]
webbrowser.open_new_tab(link)
data.on_click(do_click)
fig
import webbrowser
import plotly.graph_objs as go
import plotly.express as px
fig = go.FigureWidget(layout={'hovermode': 'closest'})
scatter = fig.add_scatter(x=df2.Price, y=df2.Surface_m2, mode='markers',fillcolor='azure')
data = fig.data[0]
fig.update_traces(marker_color='rgba(255,100,100)')
fig2 = px.scatter(x=df.Price, y=df.Surface_m2, trendline="ols")
trendline = fig2.data[1] # second trace, first one is scatter
fig.add_trace(trendline)
fig3 = px.scatter(x=df1.Price, y=df1.Surface_m2)
fig3.update_traces(marker_color='rgba(255,0,0)')
pap=fig3.data[0]
fig.add_trace(pap)
fig.update_xaxes(title_text="Price in €")
fig.update_yaxes(title_text="Surface in m2")
def do_click(trace, points, state):
if points.point_inds:
ind = points.point_inds[0]
link = df.Link.iloc[ind]
webbrowser.open_new_tab(link)
data.on_click(do_click)
fig
# change opacity of a boxplot graph to clearly see PAP points keeping boxplots
# on tool, separate pap and seloger by color (OR make on hover tell if it's particulier or agence)
# hypothesis testing
# clicking charts do not work